import json
import pymssql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time



class DrCiLabo_Shop(object):
    def __init__(self):
        self.url = 'https://www.taobao.com/'
        options = webdriver.ChromeOptions()
        # options.add_argument('--headless')
        options.add_experimental_option('excludeSwitches', ['enable-automation'])  # 隐藏程序模拟浏览器
        self.driver = webdriver.Chrome(options=options)
        self.wait = WebDriverWait(self.driver, 10)
        self.db = pymssql.connect(host='192.168.0.186', user='sa', password='987987abc#', port='1433',
                                  database='CrawlorData.Global', charset="utf8")
        self.cursor = self.db.cursor()

    # 登录操作
    def login(self):
        self.driver.get(self.url)
        with open("taobao.cookie", "r", encoding="utf8") as fp:
            cookie = json.loads(fp.read())
        for item in cookie:
            self.driver.add_cookie(item)
        self.driver.get('https://store.taobao.com/search.htm?')

    # 判断是否包含此元素
    def is_element(self, b):
        try:
            return self.wait.until(EC.presence_of_all_elements_located(\
                (By.XPATH, b)))
        except Exception as a:
            print(a)
            return ""

    # 识别是否存在树列表
    def is_Tree(self):
        flag = None
        try:
            self.driver.find_element_by_xpath('//ul[contains(@class, "J_TCatsTree")]')
            flag = True
        except Exception as e:
            print("错误提示"+e)
            flag = False
        finally:
            return flag

    # 判别是否存在一级列表
    def is_First(self):
        flag = None
        try:
            self.driver.find_element_by_xpath('//h4[contains(@class, "fst-cat-hd")]/a')
            flag = True
        except Exception as e:
            print("错误提示"+e)
            flag = False
        finally:
            return flag

    # 判别是否存在二级列表
    def is_Second(self):
        flag = None
        try:
            self.driver.find_element_by_xpath('//h4[contains(@class, "snd-cat-hd")]/a')
            flag = True
        except Exception as e:
            print("错误提示"+e)
            flag = False
        finally:
            return flag

    # # 数据入库
    # def save_data(self, ID, Market, ShopID, SellerID, ShopSearchUrl, First_category, First_category_url, Second_category, Second_category_url):
    #     try:
    #         sql = 'insert into DrCiLabo_Shop_category(ID, Market, ShopID, SellerID, ShopSearchUrl, First_Category, \
    #         Second_Category, First_Category_url, Second_Category_url, Update_Time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
    #         self.cursor.execute(sql, (str(ID), str(Market), str(ShopID), str(SellerID), str(ShopSearchUrl), str(First_category), str(Second_category), str(First_category_url), str(Second_category_url), str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))))
    #         self.db.commit()
    #     except Exception as e:
    #         print(e)
    #         self.db.rollback()

    # 从数据库中获取对应信息
    def get_url_data(self):
        sql = "SELECT ID, Market, SellerID, ShopID  FROM [dbo].[DrCiLabo_Shop] where Market = '淘宝市场'  and id not in (\
                            select id from DrCiLabo_Shop_category)"
        self.cursor.execute(sql, )
        self.data = self.cursor.fetchall()

    # 识别页面信息
    def page_parse(self):
        if self.is_First():
            # First_category = self.is_element('//ul[contains(@class, "J_TCatsTree")]/li/h4/a')
            First_category = self.is_element('//h4[contains(@class, "fst-cat-hd")]/a')
            First_category = [i.text for i in First_category]
            # First_category_url = self.is_element('//ul[contains(@class, "J_TCatsTree")]/li/h4/a')
            First_category_url = self.is_element('//h4[contains(@class, "fst-cat-hd")]/a')
            First_category_url = [i.get_attribute('href') for i in First_category_url]
        if self.is_Second():
            # Second_category = self.is_element('//ul[contains(@class, "J_TCatsTree")]/li/ul/li/h4/a')
            Second_category = self.is_element('//h4[contains(@class, "snd-cat-hd")]/a')
            Second_category = [i.text for i in Second_category]
            # Second_category_url = self.is_element('//ul[contains(@class, "J_TCatsTree")]/li/ul/li/h4/a')
            Second_category_url = self.is_element('//h4[contains(@class, "snd-cat-hd")]/a')
            Second_category_url = [i.get_attribute('href') for i in Second_category_url]
        self.zip1 = zip(First_category, First_category_url, Second_category, Second_category_url)
        # for i in self.zip1:
        #     print(i[0], i[1])
        #     print(i[2], i[3])
        #     print('--------------------')
        print(First_category, First_category_url, Second_category, Second_category_url)
        # First_category1 = ''
        # First_category_url1 = ''
        # Second_category1 = ''
        # Second_category_url1 = ''
        # for a in First_category:
        #     if a != '':
        #         First_category1 = First_category1 + a + ';'
        # for b in First_category_url:
        #     if b != '':
        #         First_category_url1 = First_category_url1 + b + ';'
        # for c in Second_category:
        #     if c != '':
        #         Second_category1 = Second_category1 + c + ';'
        # for d in Second_category_url:
        #     if d != '':
        #         Second_category_url1 = Second_category_url1 + d + ';'
        # self.First_category1 = First_category1
        # self.First_category_url1 = First_category_url1
        # self.Second_category1 = Second_category1
        # self.Second_category_url1 = Second_category_url1
        # print(First_category1, First_category_url1)
        # print(Second_category1, Second_category_url1)

    # 循环操作
    def cycle_run(self):
        for i in self.data:
            ID = i[0]
            ShopSearchUrl = 'https://store.taobao.com/search.htm?user_number_id='+str(i[2])
            self.driver.get(ShopSearchUrl)
            time.sleep(3)
            js = "var q=document.documentElement.scrollTop=10000"  # 用于页面滑动的js
            self.driver.execute_script(js)  # 把页面滑动到最下方
            time.sleep(1)
            flag = self.is_Tree()
            if flag:
                time.sleep(2)
                self.page_parse()
                # print(ID, Market, ShopID, SellerID, ShopSearchUrl)
                # print(self.First_category1, self.First_category_url1, self.Second_category1, self.Second_category_url1)
                # self.save_data(ID, Market, ShopID, SellerID, ShopSearchUrl, self.First_category1, self.First_category_url1, self.Second_category1, self.Second_category_url1)
            # elif:
            #
            time.sleep(5)

    # 控制台
    def Main(self):
        self.login()
        self.get_url_data()
        self.cycle_run()
        self.db.close()


if __name__ == '__main__':
    ds = DrCiLabo_Shop()
    ds.Main()

